Diversity indicators using OBIS data
This notebook shows how to calculate diversity indicators layers based on OBIS data. The indicators are calculated for a discrete global grid.
Read the occurrence data
Exports of all OBIS presence records are available from https://obis.org/manual/access/. Extract the archive, read the location and species identifier columns, and summarize by location:
library(readr)
library(dplyr)
if (!file.exists("occurrence.rds")) {
occ <- read_csv("../occurrence.csv", col_type = cols_only(decimallongitude = col_double(), decimallatitude = col_double(), species = col_character())) %>%
group_by(decimallongitude, decimallatitude, species) %>%
summarize(records = n()) %>%
as_tibble()
saveRDS(occ, "occurrence.rds")
} else {
occ <- readRDS("occurrence.rds")
}Create a discrete global grid
Create an ISEA discrete global grid of resolution 9 using the dggridR package:
Here’s on overview of all possible resolutions:
## res cells area_km spacing_km cls_km
## 1 0 12 51006562.1724088713527 7053.6524314108 8199.5003701020
## 2 1 32 17002187.3908029571176 4072.4281300451 4678.9698717297
## 3 2 92 5667395.7969343187287 2351.2174771369 2691.2520709129
## 4 3 272 1889131.9323114396539 1357.4760433484 1551.8675487723
## 5 4 812 629710.6441038132180 783.7391590456 895.6018416484
## 6 5 2432 209903.5480346044060 452.4920144495 517.0049969031
## 7 6 7292 69967.8493448681256 261.2463863485 298.4793231872
## 8 7 21872 23322.6164482893764 150.8306714832 172.3244908961
## 9 8 65612 7774.2054827631255 87.0821287828 99.4910857272
## 10 9 196832 2591.4018275877088 50.2768904944 57.4411078487
## 11 10 590492 863.8006091959029 29.0273762609 33.1636203580
## 12 11 1771472 287.9335363986343 16.7589634981 19.1470215381
## 13 12 5314412 95.9778454662114 9.6757920870 11.0545373459
## 14 13 15943232 31.9926151554038 5.5863211660 6.3823399790
## 15 14 47829692 10.6642050518013 3.2252640290 3.6848456792
## 16 15 143489072 3.5547350172671 1.8621070553 2.1274466399
## 17 16 430467212 1.1849116724224 1.0750880097 1.2282818893
## 18 17 1291401632 0.3949705574741 0.6207023518 0.7091488792
## 19 18 3874204892 0.1316568524914 0.3583626699 0.4094272963
## 20 19 11622614672 0.0438856174971 0.2069007839 0.2363829597
## 21 20 34867844012 0.0146285391657 0.1194542233 0.1364757654
## 22 21 104603532032 0.0048761797219 0.0689669280 0.0787943199
## 23 22 313810596092 0.0016253932406 0.0398180744 0.0454919218
## 24 23 941431788272 0.0005417977469 0.0229889760 0.0262647733
## 25 24 2824295364812 0.0001805992490 0.0132726915 0.0151639739
## 26 25 8472886094432 0.0000601997497 0.0076629920 0.0087549244
## 27 26 25418658283292 0.0000200665832 0.0044242305 0.0050546580
## 28 27 76255974849872 0.0000066888611 0.0025543307 0.0029183081
## 29 28 228767924549612 0.0000022296204 0.0014747435 0.0016848860
## 30 29 686303773648832 0.0000007432068 0.0008514436 0.0009727694
## 31 30 2058911320946492 0.0000002477356 0.0004915812 0.0005616287
Then assign cell numbers to the occurrence data:
Calculate metrics
The following function calculates the number of records, species richness, Simpson index, Shannon index, Hurlbert index (n = 50), and Hill numbers for each cell.
library(gsl)
calc <- function(df, esn = 50) {
t1 <- df %>%
group_by(cell, species) %>%
summarize(ni = sum(records))
t2 <- t1 %>%
group_by(cell) %>%
mutate(n = sum(ni))
t3 <- t2 %>%
group_by(cell, species) %>%
mutate(
hi = -(ni/n*log(ni/n)),
si = (ni/n)^2,
qi = ni/n,
esi = case_when(
n-ni >= esn ~ 1-exp(lngamma(n-ni+1)+lngamma(n-esn+1)-lngamma(n-ni-esn+1)-lngamma(n+1)),
n >= esn ~ 1
)
)
t4 <- t3 %>%
group_by(cell) %>%
summarize(
n = sum(ni),
sp = n(),
shannon = sum(hi),
simpson = sum(si),
maxp = max(qi),
es = sum(esi)
)
result <- t4 %>%
mutate(
hill_1 = exp(shannon),
hill_2 = 1/simpson,
hill_inf = 1/maxp
)
return(result)
}Perform the calculation on species level data:
## # A tibble: 117,613 x 10
## cell n sp shannon simpson maxp es hill_1 hill_2 hill_inf
## * <dbl> <int> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 205982 1336 5.21 0.0137 0.0540 39.0 183. 73.0 18.5
## 2 2 461 49 3.20 0.0567 0.124 21.5 24.6 17.6 8.09
## 3 3 279 38 3.08 0.0616 0.147 20.4 21.8 16.2 6.80
## 4 4 102 34 3.18 0.0586 0.157 24.7 24.0 17.1 6.38
## 5 5 217 78 4.04 0.0230 0.0599 34.5 56.8 43.6 16.7
## 6 6 7 6 1.75 0.184 0.286 NA 5.74 5.44 3.5
## 7 7 57 23 2.80 0.0779 0.140 21.3 16.4 12.8 7.12
## 8 8 63 15 2.11 0.194 0.381 13.8 8.22 5.15 2.62
## 9 9 228 63 3.15 0.108 0.285 23.8 23.2 9.23 3.51
## 10 10 484 90 3.36 0.126 0.343 26.5 28.8 7.91 2.92
## # … with 117,603 more rows
Add cell geometries to the metrics table:
library(sf)
grid <- dgearthgrid(dggs, frame = FALSE, wrapcells = FALSE)
grid_sf <- grid %>%
st_as_sf() %>%
st_wrap_dateline(options = c("WRAPDATELINE=YES", "DATELINEOFFSET=230"))
grid_sf$cell <- names(grid)
metrics <- merge(metrics, grid_sf, by.x = "cell", by.y = "cell") %>%
filter(st_intersects(geometry, st_as_sfc("SRID=4326;POLYGON((-180 85,180 85,180 -85,-180 -85,-180 85))"), sparse = FALSE))Let’s check the results by creating a Shannon index map:
library(rnaturalearth)
library(rnaturalearthdata)
library(viridis)
library(ggplot2)
world <- ne_countries(scale = "medium", returnclass = "sf")
ggplot() +
geom_sf(data = metrics, aes_string(fill = "shannon", geometry = "geometry"), lwd = 0) +
scale_fill_viridis(option = "inferno", na.value = "white", name = "Shannon index") +
geom_sf(data = world, fill = "#dddddd", color = NA) +
theme(panel.grid.major.x = element_blank(), panel.grid.major.y = element_blank(), panel.grid.minor.x = element_blank(), panel.grid.minor.y = element_blank(), panel.background = element_blank(), axis.text.x = element_blank(), axis.text.y = element_blank(), axis.ticks = element_blank(), axis.title.x = element_blank(), axis.title.y = element_blank()) + xlab("") + ylab("") +
coord_sf()Output
Shapefile
Write the metrics table to a shapefile:
Map image
This create a high resolution PNG map image.
metrics <- st_sf(metrics, sf_column_name = "geometry")
st_crs(metrics) <- 4326
ggplot() +
geom_sf(data = metrics, aes_string(fill = "n", color = "n", geometry = "geometry"), lwd = 0.04) +
scale_color_viridis(option = "inferno", na.value = "white", name = "Number of records", trans = "log10") +
scale_fill_viridis(option = "inferno", na.value = "white", name = "Number of records", trans = "log10") +
geom_sf(data = world, fill = "#dddddd", color = "#666666", lwd = 0.1) +
theme(
panel.grid.major.x = element_blank(),
panel.grid.major.y = element_blank(),
panel.grid.minor.x = element_blank(),
panel.grid.minor.y = element_blank(),
panel.background = element_blank(),
axis.text.x = element_blank(),
axis.text.y = element_blank(),
axis.ticks = element_blank(),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
legend.position = "none"
) +
xlab("") + ylab("") +
coord_sf(crs = "+proj=robin +lon_0=0 +x_0=0 +y_0=0 +ellps=WGS84 +datum=WGS84 +units=m +no_defs" )